###############################################################################
## Simulation by calibrating from 1999 estimates -- II. Simulate for S times ##
## Model refers to "Distribution Regression with Selection"
## Authors: Chernozhukov, V., I. Fernandez-Val and S. Luo
## Last Update: 6/17/2018



rm(list=ls());

library(parallel);
library(sampleSelection);
library(maxLik);
library(mvtnorm);
library(abind);
library(MASS);

#####################
# Extract Arguments #
#####################
#Define default values for variables
counter.method <- 2; 
# 1 for counterfactual decades within gender; 
# 2 for counterfactual sex within time period; 
# 3 for counterfactual 18 years within gender;
sample.id      <- 113104;



str.sample <- ifelse(counter.method==1, ifelse(sample.id==1, "_m","_f"), 
                     ifelse(counter.method==2, paste("_",sample.id,sep=""), ifelse(sample.id==1, "_mhalf","_fhalf")));


counter.id <- ifelse(counter.method==1, "d80100", ifelse(counter.method==2,"sex","half"));


#######################################
# Import data from a proper directory #
#######################################
mydata <- read.table("drs_fesdata.csv", header=TRUE, sep= ",");
mydata$age2 <- mydata$age2/100;
mydata$age3 <- mydata$age3/1000;
mydata$age4 <- mydata$age4/10000;

mydata$agep  <- poly(mydata$age,degree=4)[,1]*100;
mydata$agep2 <- poly(mydata$age,degree=4)[,2]*100;
mydata$agep3 <- poly(mydata$age,degree=4)[,3]*100;
mydata$agep4 <- poly(mydata$age,degree=4)[,4]*100;
mydata$lw[mydata$work==0] <- NA;

mydata$d80100 <- 0;
mydata$d80100[mydata$decade == 80]  <- 1;
mydata$d80100[mydata$decade == 100] <- 2;

mydata$half <- 0;
mydata$half[mydata$year >= 96] <- 1;
mydata$half[mydata$year <= 95] <- 2;

mydata$six_d <-1;
mydata$six_d[mydata$year>=84 & mydata$year<=89] <- 2; 
mydata$six_d[mydata$year>=90 & mydata$year<=95] <- 3; 
mydata$six_d[mydata$year>=96 & mydata$year<=101] <- 4; 
mydata$six_d[mydata$year>=102 & mydata$year<=107] <- 5; 
mydata$six_d[mydata$year>=108 & mydata$year<=113] <- 6; 


if(counter.method == 1){
  outcome.formula <- lw ~ educ16 + educ1718 + educ1920 + educ2122 + educ23 + couple + agep + agep2 + agep3 + agep4 + 
    factor(region) + numch1 + numch2 + numch34 + numch510 + numch1116 + numch1718 + factor(t_d);
}else if(counter.method == 3){
  outcome.formula <- lw ~ educ16 + educ1718 + educ1920 + educ2122 + educ23 + couple + agep + agep2 + agep3 + agep4 + 
    factor(region) + numch1 + numch2 + numch34 + numch510 + numch1116 + numch1718 + factor(half_d);
}else if(sample.id>=1000){
  outcome.formula <- lw ~ educ16 + educ1718 + educ1920 + educ2122 + educ23 + couple + agep + agep2 + agep3 + agep4 + 
    factor(region) + numch1 + numch2 + numch34 + numch510 + numch1116 + numch1718 + factor(year);
}else{ # where sample is a single year;
  outcome.formula <- lw ~ educ16 + educ1718 + educ1920 + educ2122 + educ23 + couple + agep + agep2 + agep3 + agep4 + 
    factor(region) + numch1 + numch2 + numch34 + numch510 + numch1116 + numch1718;
};


selection.formula <- update(outcome.formula, work ~ . + tubeninc0 + m_inc0);

rho.formula <- work ~ 1; # dependent variable does not mattrer;


Y.name <- all.vars(outcome.formula)[1];


####################
# Parallel Setting #
####################

# Calculate the namber of cores
n.cores <- as.numeric(Sys.getenv("NSLOTS"));
if(is.na(n.cores))n.cores <- 1;

################################################
# Source the file which contains the functions #
################################################
source("DRS_Source.R");


## Counterfactual Distributions for grouped/individual years ##


# Keep data for one gender for a period (from.year ~ to.year) #
if(counter.method == 1){
  mysample <- mydata[is.element(mydata$year,c(80:89,100:109)) & mydata$sex == sample.id,];
}else if(counter.method == 3){
  mysample <- mydata[mydata$sex == sample.id,];
}else{
  if(sample.id >= 1000){
    if(sample.id >= 100000){
      from.year <- sample.id %/% 1000;
      to.year   <- sample.id - from.year*1000;
    }else{
      from.year <- sample.id %/% 100;
      to.year   <- sample.id - from.year*100;
    };
  }else{
    from.year <- sample.id;
    to.year   <- sample.id;
  }
  mysample <- mydata[is.element(mydata$year,c(from.year:to.year)) & mydata[,counter.id]==2,];
};

####################################
# Set up Threshold being estimated #
####################################
u.thres <- 0.99;
l.thres <- 0.01;
by.thres <- 0.01; 
ys <- quantile(mysample[,Y.name],seq(l.thres,u.thres,by.thres), na.rm=TRUE);
n.thres <- length(ys);


#############################
# Estimate parameters and F #
#############################
# For now, we only focus on simulations for female #
elist.f <- Estimation(mysample, ys, outcome.formula, selection.formula, rho.formula, initial.val=numeric(0));

Zwc.all <- model.matrix(selection.formula, mysample);
Z.name  <- colnames(Zwc.all);
Heckman.f       <- selection(selection=selection.formula, outcome=outcome.formula, method="2-step", data=mysample);
pi.Heckman.f    <- coef(Heckman.f)[1:length(Z.name)];
beta.Heckman.f  <- coef(Heckman.f, part="outcome");
rho.Heckman.f   <- coef(Heckman.f)["rho"];
sigma.Heckman.f <- coef(Heckman.f)["sigma"];


###########################
# Save estimation results #
###########################
save.image(paste("Est_female",str.sample,".RData",sep=""));
